alpha_mgd = 0.000005
lambda_mgd = 0.65
distill_cfg = [dict(student_module='model.17.cv3.act',
                    teacher_module='model.17.cv3.act',
                    output_hook=True,
                    methods=[dict(
                        name='17_cv3_act',
                        student_channels=64,
                        teacher_channels=256,
                        alpha_mgd=alpha_mgd,
                        lambda_mgd=lambda_mgd,
                    )
                    ]
                    ),
               dict(student_module='model.20.cv3.act',
                    teacher_module='model.20.cv3.act',
                    output_hook=True,
                    methods=[dict(
                        name='20_cv3_act',
                        student_channels=128,
                        teacher_channels=512,
                        alpha_mgd=alpha_mgd,
                        lambda_mgd=lambda_mgd,
                    )
                    ]
                    ),
               dict(student_module='model.23.cv3.act',
                    teacher_module='model.23.cv3.act',
                    output_hook=True,
                    methods=[dict(
                        name='23_cv3_act',
                        student_channels=256,
                        teacher_channels=1024,
                        alpha_mgd=alpha_mgd,
                        lambda_mgd=lambda_mgd,
                    )
                    ]
                    ),
               ]
# optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=35, norm_type=2))
